import pandas as pd
import statsmodels.formula.api as smf
import numpy as np

# Load data
df = pd.read_csv('cleaned_with_male_head.csv')
df['CHILD'] = df['CHILD'].astype('category')
df['H_TYPE'] = df['H_TYPE'].astype('category')
df['year'] = df['year'].astype('category')

# Demean function (same as above)
def demean(df, vars_to_demean, group_col='code_id_hh'):
    df_demean = df.copy()
    for var in vars_to_demean:
        if var in df_demean.columns:
            group_mean = df_demean.groupby(group_col)[var].transform('mean')
            df_demean[var] = df_demean[var] - group_mean
    return df_demean

vars_to_demean = ['fert', 'mar', 'emp', 'pri', 'sec', 'high', 'inc', 'fam']
df_demean = demean(df, vars_to_demean)

# Model
formula = 'fert ~ C(H_TYPE) + C(CHILD) + mar + emp + pri + sec + high + inc + fam + C(year)'
model = smf.ols(formula, data=df_demean).fit(cov_type='cluster', cov_kwds={'groups': df_demean['code_id_hh']})

# Get coeff and 95% CI (same as above)
def get_param_info(model, param):
    coeff = model.params.get(param, np.nan)
    se = model.bse.get(param, np.nan)
    if np.isnan(coeff):
        return "N/A"
    ci_low = coeff - 1.96 * se
    ci_high = coeff + 1.96 * se
    return f"{round(coeff, 4)} ({round(ci_low, 4)} to {round(ci_high, 4)})"

# Build table
table7_data = {
    'Variable': [
        'Household structure^a:',
        '- Living with grandfather',
        '- Living with grandmother',
        '- Living with both',
        'Sex of children^b:',
        '- At least 1 son',
        '- At least 1 daughter',
        '- At least 1 son and 1 daughter',
        'Proportion of married women',
        'Proportion of employed women',
        'Proportion of women with:',
        '- Primary education',
        '- Secondary education',
        '- High school education',
        'Household income',
        'Household size',
        'Constant',
        'Observations',
        'Households',
        'Individual FE',
        'Year FE',
        'R-squared'
    ],
    'No. of children under 3': [
        '',
        get_param_info(model, 'C(H_TYPE)[T.1]'),
        get_param_info(model, 'C(H_TYPE)[T.2]'),
        get_param_info(model, 'C(H_TYPE)[T.3]'),
        '',
        get_param_info(model, 'C(CHILD)[T.1]'),
        get_param_info(model, 'C(CHILD)[T.2]'),
        get_param_info(model, 'C(CHILD)[T.3]'),
        get_param_info(model, 'mar'),
        get_param_info(model, 'emp'),
        '',
        get_param_info(model, 'pri'),
        get_param_info(model, 'sec'),
        get_param_info(model, 'high'),
        get_param_info(model, 'inc'),
        get_param_info(model, 'fam'),
        get_param_info(model, 'Intercept'),
        int(model.nobs),
        df['code_id_hh'].nunique(),
        'Yes',
        'Yes',
        round(model.rsquared, 3)
    ]
}
table7 = pd.DataFrame(table7_data)
print("\nRevised Table 7:\n", table7)
table7.to_csv('revise_table_7.csv', index=False)